{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# ch05/layer_naive.py"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "class MulLayer:\n",
    "    def __init__(self):\n",
    "        self.x = None\n",
    "        self.y = None\n",
    "\n",
    "    def forward(self, x, y):\n",
    "        self.x = x\n",
    "        self.y = y                \n",
    "        out = x * y\n",
    "\n",
    "        return out\n",
    "\n",
    "    def backward(self, dout):\n",
    "        dx = dout * self.y\n",
    "        dy = dout * self.x\n",
    "\n",
    "        return dx, dy\n",
    "\n",
    "\n",
    "class AddLayer:\n",
    "    def __init__(self):\n",
    "        pass\n",
    "\n",
    "    def forward(self, x, y):\n",
    "        out = x + y\n",
    "\n",
    "        return out\n",
    "\n",
    "    def backward(self, dout):\n",
    "        dx = dout * 1\n",
    "        dy = dout * 1\n",
    "\n",
    "        return dx, dy"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# ch05/buy_apple.py"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "price: 220\n",
      "dApple: 2.2\n",
      "dApple_num: 110\n",
      "dTax: 200\n"
     ]
    }
   ],
   "source": [
    "apple = 100\n",
    "apple_num = 2\n",
    "tax = 1.1\n",
    "\n",
    "mul_apple_layer = MulLayer()\n",
    "mul_tax_layer = MulLayer()\n",
    "\n",
    "# forward\n",
    "apple_price = mul_apple_layer.forward(apple, apple_num)\n",
    "price = mul_tax_layer.forward(apple_price, tax)\n",
    "\n",
    "# backward\n",
    "dprice = 1\n",
    "dapple_price, dtax = mul_tax_layer.backward(dprice)\n",
    "dapple, dapple_num = mul_apple_layer.backward(dapple_price)\n",
    "\n",
    "print(\"price:\", int(price))\n",
    "print(\"dApple:\", dapple)\n",
    "print(\"dApple_num:\", int(dapple_num))\n",
    "print(\"dTax:\", dtax)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# ch05/buy_apple_orange.py"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "price: 715\n",
      "dApple: 2.2\n",
      "dApple_num: 110\n",
      "dOrange: 3.3000000000000003\n",
      "dOrange_num: 165\n",
      "dTax: 650\n"
     ]
    }
   ],
   "source": [
    "apple = 100\n",
    "apple_num = 2\n",
    "orange = 150\n",
    "orange_num = 3\n",
    "tax = 1.1\n",
    "\n",
    "# layer\n",
    "mul_apple_layer = MulLayer()\n",
    "mul_orange_layer = MulLayer()\n",
    "add_apple_orange_layer = AddLayer()\n",
    "mul_tax_layer = MulLayer()\n",
    "\n",
    "# forward\n",
    "apple_price = mul_apple_layer.forward(apple, apple_num)  # (1)\n",
    "orange_price = mul_orange_layer.forward(orange, orange_num)  # (2)\n",
    "all_price = add_apple_orange_layer.forward(apple_price, orange_price)  # (3)\n",
    "price = mul_tax_layer.forward(all_price, tax)  # (4)\n",
    "\n",
    "# backward\n",
    "dprice = 1\n",
    "dall_price, dtax = mul_tax_layer.backward(dprice)  # (4)\n",
    "dapple_price, dorange_price = add_apple_orange_layer.backward(dall_price)  # (3)\n",
    "dorange, dorange_num = mul_orange_layer.backward(dorange_price)  # (2)\n",
    "dapple, dapple_num = mul_apple_layer.backward(dapple_price)  # (1)\n",
    "\n",
    "print(\"price:\", int(price))\n",
    "print(\"dApple:\", dapple)\n",
    "print(\"dApple_num:\", int(dapple_num))\n",
    "print(\"dOrange:\", dorange)\n",
    "print(\"dOrange_num:\", int(dorange_num))\n",
    "print(\"dTax:\", dtax)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# ch05/two_layer_net.py"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys, os\n",
    "sys.path.append(os.pardir)  # 親ディレクトリのファイルをインポートするための設定\n",
    "import numpy as np\n",
    "from common.layers import *\n",
    "from common.gradient import numerical_gradient\n",
    "from collections import OrderedDict\n",
    "\n",
    "\n",
    "\n",
    "class TwoLayerNet:\n",
    "    def __init__(self, input_size, hidden_size, output_size, weight_init_std = 0.01):\n",
    "        # 重みの初期化\n",
    "        self.params = {}\n",
    "        self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size)\n",
    "        self.params['b1'] = np.zeros(hidden_size)\n",
    "        self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size) \n",
    "        self.params['b2'] = np.zeros(output_size)\n",
    "\n",
    "        # レイヤの生成\n",
    "        self.layers = OrderedDict()\n",
    "        self.layers['Affine1'] = Affine(self.params['W1'], self.params['b1'])\n",
    "        self.layers['Relu1'] = Relu()\n",
    "        self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2'])\n",
    "\n",
    "        self.lastLayer = SoftmaxWithLoss()\n",
    "        \n",
    "    def predict(self, x):\n",
    "        for layer in self.layers.values():\n",
    "            x = layer.forward(x)\n",
    "        \n",
    "        return x\n",
    "        \n",
    "    # x:入力データ, t:教師データ\n",
    "    def loss(self, x, t):\n",
    "        y = self.predict(x)\n",
    "        return self.lastLayer.forward(y, t)\n",
    "    \n",
    "    def accuracy(self, x, t):\n",
    "        y = self.predict(x)\n",
    "        y = np.argmax(y, axis=1)\n",
    "        if t.ndim != 1 : t = np.argmax(t, axis=1)\n",
    "        \n",
    "        accuracy = np.sum(y == t) / float(x.shape[0])\n",
    "        return accuracy\n",
    "        \n",
    "    # x:入力データ, t:教師データ\n",
    "    def numerical_gradient(self, x, t):\n",
    "        loss_W = lambda W: self.loss(x, t)\n",
    "        \n",
    "        grads = {}\n",
    "        grads['W1'] = numerical_gradient(loss_W, self.params['W1'])\n",
    "        grads['b1'] = numerical_gradient(loss_W, self.params['b1'])\n",
    "        grads['W2'] = numerical_gradient(loss_W, self.params['W2'])\n",
    "        grads['b2'] = numerical_gradient(loss_W, self.params['b2'])\n",
    "        \n",
    "        return grads\n",
    "        \n",
    "    def gradient(self, x, t):\n",
    "        # forward\n",
    "        self.loss(x, t)\n",
    "\n",
    "        # backward\n",
    "        dout = 1\n",
    "        dout = self.lastLayer.backward(dout)\n",
    "        \n",
    "        layers = list(self.layers.values())\n",
    "        layers.reverse()\n",
    "        for layer in layers:\n",
    "            dout = layer.backward(dout)\n",
    "\n",
    "        # 設定\n",
    "        grads = {}\n",
    "        grads['W1'], grads['b1'] = self.layers['Affine1'].dW, self.layers['Affine1'].db\n",
    "        grads['W2'], grads['b2'] = self.layers['Affine2'].dW, self.layers['Affine2'].db\n",
    "\n",
    "        return grads"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# ch05/gradient_check.py"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "W1:1.7966944727947356e-06\n",
      "b1:2.3788128390528938e-05\n",
      "W2:6.2129618899126245e-09\n",
      "b2:1.3941309729426087e-07\n"
     ]
    }
   ],
   "source": [
    "import sys, os\n",
    "sys.path.append(os.pardir)  # 親ディレクトリのファイルをインポートするための設定\n",
    "import numpy as np\n",
    "from dataset.mnist import load_mnist\n",
    "\n",
    "# データの読み込み\n",
    "(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)\n",
    "\n",
    "network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)\n",
    "\n",
    "x_batch = x_train[:3]\n",
    "t_batch = t_train[:3]\n",
    "\n",
    "grad_numerical = network.numerical_gradient(x_batch, t_batch)\n",
    "grad_backprop = network.gradient(x_batch, t_batch)\n",
    "\n",
    "for key in grad_numerical.keys():\n",
    "    diff = np.average( np.abs(grad_backprop[key] - grad_numerical[key]) )\n",
    "    print(key + \":\" + str(diff))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# ch05/train_neuralnet.py"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.10748333333333333 0.1064\n",
      "0.90525 0.9047\n",
      "0.9235833333333333 0.9245\n",
      "0.9364666666666667 0.9352\n",
      "0.9448166666666666 0.9436\n",
      "0.9519166666666666 0.9513\n",
      "0.95735 0.9543\n",
      "0.9620333333333333 0.959\n",
      "0.9645833333333333 0.9606\n",
      "0.9675 0.9619\n",
      "0.9677 0.9629\n",
      "0.9703833333333334 0.9637\n",
      "0.9718833333333333 0.9661\n",
      "0.9733333333333334 0.9664\n",
      "0.9764 0.9697\n",
      "0.9770666666666666 0.9694\n",
      "0.9778666666666667 0.969\n"
     ]
    }
   ],
   "source": [
    "import sys, os\n",
    "sys.path.append(os.pardir)\n",
    "\n",
    "import numpy as np\n",
    "from dataset.mnist import load_mnist\n",
    "\n",
    "# データの読み込み\n",
    "(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)\n",
    "\n",
    "network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)\n",
    "\n",
    "iters_num = 10000\n",
    "train_size = x_train.shape[0]\n",
    "batch_size = 100\n",
    "learning_rate = 0.1\n",
    "\n",
    "train_loss_list = []\n",
    "train_acc_list = []\n",
    "test_acc_list = []\n",
    "\n",
    "iter_per_epoch = max(train_size / batch_size, 1)\n",
    "\n",
    "for i in range(iters_num):\n",
    "    batch_mask = np.random.choice(train_size, batch_size)\n",
    "    x_batch = x_train[batch_mask]\n",
    "    t_batch = t_train[batch_mask]\n",
    "    \n",
    "    # 勾配\n",
    "    #grad = network.numerical_gradient(x_batch, t_batch)\n",
    "    grad = network.gradient(x_batch, t_batch)\n",
    "    \n",
    "    # 更新\n",
    "    for key in ('W1', 'b1', 'W2', 'b2'):\n",
    "        network.params[key] -= learning_rate * grad[key]\n",
    "    \n",
    "    loss = network.loss(x_batch, t_batch)\n",
    "    train_loss_list.append(loss)\n",
    "    \n",
    "    if i % iter_per_epoch == 0:\n",
    "        train_acc = network.accuracy(x_train, t_train)\n",
    "        test_acc = network.accuracy(x_test, t_test)\n",
    "        train_acc_list.append(train_acc)\n",
    "        test_acc_list.append(test_acc)\n",
    "        print(train_acc, test_acc)\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "dezero:Python",
   "language": "python",
   "name": "conda-env-dezero-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
